require(quanteda)

dat <- readRDS("data/muller2021/data_manifestos_classified.rds") %>% 
    subset(language == "german")
dat$class <- factor(dat$class, levels = c("Past", "Present", "Future"))
dat$year <- as.integer(dat$year)
corp <- corpus(dat, docid_field = "id", unique_docnames = FALSE)
toks <- tokens(corp, remove_numbers = TRUE, remove_url = TRUE)
saveRDS(toks, "data/tokens_de.rds")

dat_test <- readRDS("data/muller2021/data_sentences_classified_german.rds")
dat_test$class <- factor(dat_test$class, levels = c("Past", "Present", "Future"))
corp_test <- corpus(dat_test, docid_field = "id", unique_docnames = FALSE)
toks_test <- tokens(corp_test)
saveRDS(toks_test, "data/tokens_test_de.rds")

